排序
1> 按标签排序
使用 sort_index()对 DataFrame 进行排序,默认情况下,按照升序对行标签机型排序
un_sorted_df = pd.DataFrame(np.random.randn(5,2), index = [1,4,3,2,0],columns = ['col1','col2'])
print(f'排序前:\n{un_sorted_df}')
# 输出结果:
# 排序前:
# col1 col2
# 1 1.384395 1.761308
# 4 -0.421264 -0.978312
# 3 1.999370 0.127830
# 2 0.856805 0.713296
# 0 -0.441599 1.015312
print(f'排序后:\n{un_sorted_df.sort_index()}')
# 输出结果:
# 排序后:
# col1 col2
# 0 -0.441599 1.015312
# 1 1.384395 1.761308
# 2 0.856805 0.713296
# 3 1.999370 0.127830
# 4 -0.421264 -0.978312
2> 排序顺序
un_sorted_df = pd.DataFrame(np.random.randn(5,2), index = [1,4,3,2,0],columns = ['col1','col2'])
print(f'排序前:\n{un_sorted_df}')
# 输出结果:
# 排序前:
# col1 col2
# 1 1.384395 1.761308
# 4 -0.421264 -0.978312
# 3 1.999370 0.127830
# 2 0.856805 0.713296
# 0 -0.441599 1.015312
print(f'排序后:\n{un_sorted_df.sort_index(ascending = False)}')
# 输出结果:
# 排序后:
# col1 col2
# 4 1.157527 -0.151781
# 3 0.191023 0.766429
# 2 1.112582 -1.256559
# 1 0.276446 0.804178
# 0 -1.394744 -0.462737
3> 按列排序
un_sorted_df = pd.DataFrame(np.random.randn(5,2), index = [1,4,3,2,0],columns = ['col2','col1'])
print(f'排序前:\n{un_sorted_df}')
# 输出结果:
# 排序前:
# col2 col1
# 1 -0.624151 0.113447
# 4 0.419938 -0.805421
# 3 0.247321 0.260455
# 2 -0.464907 -0.453989
# 0 0.454340 -1.243215
print(f'排序后:\n{un_sorted_df.sort_index(axis = 1)}')
# 输出结果:
# 排序后:
# col1 col2
# 1 0.113447 -0.624151
# 4 -0.805421 0.419938
# 3 0.260455 0.247321
# 2 -0.453989 -0.464907
# 0 -1.243215 0.45434
4> 按值排序
un_sorted_df = pd.DataFrame({'col1':[2, 1, 1, 1],'col2':[1, 3, 2, 4]})
print(f'排序前:\n{un_sorted_df}')
# 输出结果:
# 排序前:
# col1 col2
# 0 2 1
# 1 1 3
# 2 1 2
# 3 1 4
print(f'排序后:\n{un_sorted_df.sort_values(by = ["col1","col2"])}')
# 输出结果:
# 排序后:
# col1 col2
# 2 1 2
# 1 1 3
# 3 1 4
# 0 2 1
5> 排序算法
- mergesort(稳定)
- heapsort
- quicksort
# 输出结果:
# 排序前:
# col1 col2
# 0 2 1
# 1 1 3
# 2 1 2
# 3 1 4un_sorted_df = pd.DataFrame({'col1':[2, 1, 1, 1],'col2':[1, 3, 2, 4]})
print(f'排序前:\n{un_sorted_df}')
# 输出结果:
# 排序前:
# col1 col2
# 0 2 1
# 1 1 3
# 2 1 2
# 3 1 4
print(f'排序后:\n{un_sorted_df.sort_values(by = "col2",kind = "mergesort")}')
# 输出结果:
# 排序后:
# col1 col2
# 0 2 1
# 2 1 2
# 1 1 3
# 3 1 4